import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('flood.csv')
print(df)
MonsoonIntensity TopographyDrainage RiverManagement Deforestation \
0 3 8 6 6
1 8 4 5 7
2 3 10 4 1
3 4 4 2 7
4 3 7 5 2
... ... ... ... ...
49995 3 7 4 7
49996 3 10 3 8
49997 4 4 5 7
49998 4 5 4 4
49999 4 5 6 3
Urbanization ClimateChange DamsQuality Siltation \
0 4 4 6 2
1 7 9 1 5
2 7 5 4 7
3 3 4 1 4
4 5 8 5 2
... ... ... ... ...
49995 5 9 4 6
49996 3 3 4 4
49997 2 1 4 5
49998 6 3 10 2
49999 5 6 5 4
AgriculturalPractices Encroachments ... DrainageSystems \
0 3 2 ... 10
1 5 4 ... 9
2 4 9 ... 7
3 6 4 ... 4
4 7 5 ... 7
... ... ... ... ...
49995 10 4 ... 7
49996 3 11 ... 8
49997 6 7 ... 4
49998 6 11 ... 6
49999 9 10 ... 2
CoastalVulnerability Landslides Watersheds \
0 7 4 2
1 2 6 2
2 4 4 8
3 2 6 6
4 6 5 3
... ... ... ...
49995 3 8 8
49996 6 3 6
49997 6 4 1
49998 3 4 7
49999 4 4 5
DeterioratingInfrastructure PopulationScore WetlandLoss \
0 3 4 3
1 1 1 9
2 6 1 8
3 8 8 6
4 3 4 4
... ... ... ...
49995 6 1 5
49996 4 4 2
49997 5 1 6
49998 6 2 4
49999 6 7 8
InadequatePlanning PoliticalFactors FloodProbability
0 2 6 0.450
1 1 3 0.475
2 3 6 0.515
3 6 10 0.520
4 3 4 0.475
... ... ... ...
49995 4 2 0.535
49996 4 5 0.510
49997 4 3 0.430
49998 0 11 0.515
49999 10 7 0.580
[50000 rows x 21 columns]
df.head()
| MonsoonIntensity | TopographyDrainage | RiverManagement | Deforestation | Urbanization | ClimateChange | DamsQuality | Siltation | AgriculturalPractices | Encroachments | ... | DrainageSystems | CoastalVulnerability | Landslides | Watersheds | DeterioratingInfrastructure | PopulationScore | WetlandLoss | InadequatePlanning | PoliticalFactors | FloodProbability | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3 | 8 | 6 | 6 | 4 | 4 | 6 | 2 | 3 | 2 | ... | 10 | 7 | 4 | 2 | 3 | 4 | 3 | 2 | 6 | 0.450 |
| 1 | 8 | 4 | 5 | 7 | 7 | 9 | 1 | 5 | 5 | 4 | ... | 9 | 2 | 6 | 2 | 1 | 1 | 9 | 1 | 3 | 0.475 |
| 2 | 3 | 10 | 4 | 1 | 7 | 5 | 4 | 7 | 4 | 9 | ... | 7 | 4 | 4 | 8 | 6 | 1 | 8 | 3 | 6 | 0.515 |
| 3 | 4 | 4 | 2 | 7 | 3 | 4 | 1 | 4 | 6 | 4 | ... | 4 | 2 | 6 | 6 | 8 | 8 | 6 | 6 | 10 | 0.520 |
| 4 | 3 | 7 | 5 | 2 | 5 | 8 | 5 | 2 | 7 | 5 | ... | 7 | 6 | 5 | 3 | 3 | 4 | 4 | 3 | 4 | 0.475 |
5 rows × 21 columns
df.shape
(50000, 21)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50000 entries, 0 to 49999 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 MonsoonIntensity 50000 non-null int64 1 TopographyDrainage 50000 non-null int64 2 RiverManagement 50000 non-null int64 3 Deforestation 50000 non-null int64 4 Urbanization 50000 non-null int64 5 ClimateChange 50000 non-null int64 6 DamsQuality 50000 non-null int64 7 Siltation 50000 non-null int64 8 AgriculturalPractices 50000 non-null int64 9 Encroachments 50000 non-null int64 10 IneffectiveDisasterPreparedness 50000 non-null int64 11 DrainageSystems 50000 non-null int64 12 CoastalVulnerability 50000 non-null int64 13 Landslides 50000 non-null int64 14 Watersheds 50000 non-null int64 15 DeterioratingInfrastructure 50000 non-null int64 16 PopulationScore 50000 non-null int64 17 WetlandLoss 50000 non-null int64 18 InadequatePlanning 50000 non-null int64 19 PoliticalFactors 50000 non-null int64 20 FloodProbability 50000 non-null float64 dtypes: float64(1), int64(20) memory usage: 8.0 MB
print(df.dtypes)
MonsoonIntensity int64 TopographyDrainage int64 RiverManagement int64 Deforestation int64 Urbanization int64 ClimateChange int64 DamsQuality int64 Siltation int64 AgriculturalPractices int64 Encroachments int64 IneffectiveDisasterPreparedness int64 DrainageSystems int64 CoastalVulnerability int64 Landslides int64 Watersheds int64 DeterioratingInfrastructure int64 PopulationScore int64 WetlandLoss int64 InadequatePlanning int64 PoliticalFactors int64 FloodProbability float64 dtype: object
df.describe()
| MonsoonIntensity | TopographyDrainage | RiverManagement | Deforestation | Urbanization | ClimateChange | DamsQuality | Siltation | AgriculturalPractices | Encroachments | ... | DrainageSystems | CoastalVulnerability | Landslides | Watersheds | DeterioratingInfrastructure | PopulationScore | WetlandLoss | InadequatePlanning | PoliticalFactors | FloodProbability | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 50000.000000 | 50000.000000 | 50000.00000 | 50000.000000 | 50000.000000 | 50000.000000 | 50000.00000 | 50000.000000 | 50000.000000 | 50000.000000 | ... | 50000.000000 | 50000.000000 | 50000.000000 | 50000.00000 | 50000.000000 | 50000.000000 | 50000.00000 | 50000.000000 | 50000.000000 | 50000.000000 |
| mean | 4.991480 | 4.984100 | 5.01594 | 5.008480 | 4.989060 | 4.988340 | 5.01536 | 4.988600 | 5.006120 | 5.006380 | ... | 5.006060 | 4.999920 | 4.984220 | 4.97982 | 4.988200 | 4.984980 | 5.00512 | 4.994360 | 4.990520 | 0.499660 |
| std | 2.236834 | 2.246488 | 2.23131 | 2.222743 | 2.243159 | 2.226761 | 2.24500 | 2.232642 | 2.234588 | 2.241633 | ... | 2.238107 | 2.247101 | 2.227741 | 2.23219 | 2.231134 | 2.238279 | 2.23176 | 2.230011 | 2.246075 | 0.050034 |
| min | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.285000 |
| 25% | 3.000000 | 3.000000 | 3.00000 | 3.000000 | 3.000000 | 3.000000 | 3.00000 | 3.000000 | 3.000000 | 3.000000 | ... | 3.000000 | 3.000000 | 3.000000 | 3.00000 | 3.000000 | 3.000000 | 3.00000 | 3.000000 | 3.000000 | 0.465000 |
| 50% | 5.000000 | 5.000000 | 5.00000 | 5.000000 | 5.000000 | 5.000000 | 5.00000 | 5.000000 | 5.000000 | 5.000000 | ... | 5.000000 | 5.000000 | 5.000000 | 5.00000 | 5.000000 | 5.000000 | 5.00000 | 5.000000 | 5.000000 | 0.500000 |
| 75% | 6.000000 | 6.000000 | 6.00000 | 6.000000 | 6.000000 | 6.000000 | 6.00000 | 6.000000 | 6.000000 | 6.000000 | ... | 6.000000 | 6.000000 | 6.000000 | 6.00000 | 6.000000 | 6.000000 | 6.00000 | 6.000000 | 6.000000 | 0.535000 |
| max | 16.000000 | 18.000000 | 16.00000 | 17.000000 | 17.000000 | 17.000000 | 16.00000 | 16.000000 | 16.000000 | 18.000000 | ... | 17.000000 | 17.000000 | 16.000000 | 16.00000 | 17.000000 | 19.000000 | 22.00000 | 16.000000 | 16.000000 | 0.725000 |
8 rows × 21 columns
df.isnull().values.any()
False
df.isnull().values.sum()
0
df.isnull().sum()
MonsoonIntensity 0 TopographyDrainage 0 RiverManagement 0 Deforestation 0 Urbanization 0 ClimateChange 0 DamsQuality 0 Siltation 0 AgriculturalPractices 0 Encroachments 0 IneffectiveDisasterPreparedness 0 DrainageSystems 0 CoastalVulnerability 0 Landslides 0 Watersheds 0 DeterioratingInfrastructure 0 PopulationScore 0 WetlandLoss 0 InadequatePlanning 0 PoliticalFactors 0 FloodProbability 0 dtype: int64
print(df.columns)
Index(['MonsoonIntensity', 'TopographyDrainage', 'RiverManagement',
'Deforestation', 'Urbanization', 'ClimateChange', 'DamsQuality',
'Siltation', 'AgriculturalPractices', 'Encroachments',
'IneffectiveDisasterPreparedness', 'DrainageSystems',
'CoastalVulnerability', 'Landslides', 'Watersheds',
'DeterioratingInfrastructure', 'PopulationScore', 'WetlandLoss',
'InadequatePlanning', 'PoliticalFactors', 'FloodProbability'],
dtype='object')
print(df.tail(n=10))
MonsoonIntensity TopographyDrainage RiverManagement Deforestation \
49990 5 8 3 7
49991 3 10 1 3
49992 5 8 7 6
49993 4 4 5 5
49994 6 5 3 5
49995 3 7 4 7
49996 3 10 3 8
49997 4 4 5 7
49998 4 5 4 4
49999 4 5 6 3
Urbanization ClimateChange DamsQuality Siltation \
49990 3 5 2 8
49991 2 2 6 5
49992 6 6 4 2
49993 13 7 2 7
49994 9 4 6 6
49995 5 9 4 6
49996 3 3 4 4
49997 2 1 4 5
49998 6 3 10 2
49999 5 6 5 4
AgriculturalPractices Encroachments ... DrainageSystems \
49990 12 11 ... 2
49991 6 4 ... 6
49992 7 6 ... 5
49993 10 5 ... 6
49994 3 5 ... 6
49995 10 4 ... 7
49996 3 11 ... 8
49997 6 7 ... 4
49998 6 11 ... 6
49999 9 10 ... 2
CoastalVulnerability Landslides Watersheds \
49990 6 5 4
49991 3 3 3
49992 1 8 3
49993 9 0 7
49994 3 6 8
49995 3 8 8
49996 6 3 6
49997 6 4 1
49998 3 4 7
49999 4 4 5
DeterioratingInfrastructure PopulationScore WetlandLoss \
49990 5 5 6
49991 4 2 7
49992 6 4 5
49993 4 5 6
49994 2 9 7
49995 6 1 5
49996 4 4 2
49997 5 1 6
49998 6 2 4
49999 6 7 8
InadequatePlanning PoliticalFactors FloodProbability
49990 4 4 0.580
49991 5 4 0.435
49992 3 7 0.520
49993 3 0 0.525
49994 5 4 0.535
49995 4 2 0.535
49996 4 5 0.510
49997 4 3 0.430
49998 0 11 0.515
49999 10 7 0.580
[10 rows x 21 columns]
df.corr
<bound method DataFrame.corr of MonsoonIntensity TopographyDrainage RiverManagement Deforestation \
0 3 8 6 6
1 8 4 5 7
2 3 10 4 1
3 4 4 2 7
4 3 7 5 2
... ... ... ... ...
49995 3 7 4 7
49996 3 10 3 8
49997 4 4 5 7
49998 4 5 4 4
49999 4 5 6 3
Urbanization ClimateChange DamsQuality Siltation \
0 4 4 6 2
1 7 9 1 5
2 7 5 4 7
3 3 4 1 4
4 5 8 5 2
... ... ... ... ...
49995 5 9 4 6
49996 3 3 4 4
49997 2 1 4 5
49998 6 3 10 2
49999 5 6 5 4
AgriculturalPractices Encroachments ... DrainageSystems \
0 3 2 ... 10
1 5 4 ... 9
2 4 9 ... 7
3 6 4 ... 4
4 7 5 ... 7
... ... ... ... ...
49995 10 4 ... 7
49996 3 11 ... 8
49997 6 7 ... 4
49998 6 11 ... 6
49999 9 10 ... 2
CoastalVulnerability Landslides Watersheds \
0 7 4 2
1 2 6 2
2 4 4 8
3 2 6 6
4 6 5 3
... ... ... ...
49995 3 8 8
49996 6 3 6
49997 6 4 1
49998 3 4 7
49999 4 4 5
DeterioratingInfrastructure PopulationScore WetlandLoss \
0 3 4 3
1 1 1 9
2 6 1 8
3 8 8 6
4 3 4 4
... ... ... ...
49995 6 1 5
49996 4 4 2
49997 5 1 6
49998 6 2 4
49999 6 7 8
InadequatePlanning PoliticalFactors FloodProbability
0 2 6 0.450
1 1 3 0.475
2 3 6 0.515
3 6 10 0.520
4 3 4 0.475
... ... ... ...
49995 4 2 0.535
49996 4 5 0.510
49997 4 3 0.430
49998 0 11 0.515
49999 10 7 0.580
[50000 rows x 21 columns]>
corr_matrix = df.corr()
# plot the heat map
fig, ax = plt.subplots(figsize=(20, 15))
sns.heatmap(corr_matrix, annot=True, ax=ax, cmap='coolwarm')
plt.show()
from matplotlib import pyplot as grph
df.hist(figsize=(18, 14), color='blue', edgecolor='black')
grph.tight_layout()
grph.show()
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
# logistic regression
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
X=df.copy()
X.drop(columns=['FloodProbability'],inplace=True)
y=df[['FloodProbability']]
print(df)
#splitting the data into training data and test data ( aorund 20% of data will be used for testing the results whereas 80% for training the model)
#X_train contains training data
# y_train cotains result set for training data ( both X_train and y_train will train the model)
# X_test contains the test data
# y_test contains test expected Results
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train_original=X_train
X_test_original=X_test
y_train_original=y_train
y_test_original=y_test
X_train_original
MonsoonIntensity TopographyDrainage RiverManagement Deforestation \
0 3 8 6 6
1 8 4 5 7
2 3 10 4 1
3 4 4 2 7
4 3 7 5 2
... ... ... ... ...
49995 3 7 4 7
49996 3 10 3 8
49997 4 4 5 7
49998 4 5 4 4
49999 4 5 6 3
Urbanization ClimateChange DamsQuality Siltation \
0 4 4 6 2
1 7 9 1 5
2 7 5 4 7
3 3 4 1 4
4 5 8 5 2
... ... ... ... ...
49995 5 9 4 6
49996 3 3 4 4
49997 2 1 4 5
49998 6 3 10 2
49999 5 6 5 4
AgriculturalPractices Encroachments ... DrainageSystems \
0 3 2 ... 10
1 5 4 ... 9
2 4 9 ... 7
3 6 4 ... 4
4 7 5 ... 7
... ... ... ... ...
49995 10 4 ... 7
49996 3 11 ... 8
49997 6 7 ... 4
49998 6 11 ... 6
49999 9 10 ... 2
CoastalVulnerability Landslides Watersheds \
0 7 4 2
1 2 6 2
2 4 4 8
3 2 6 6
4 6 5 3
... ... ... ...
49995 3 8 8
49996 6 3 6
49997 6 4 1
49998 3 4 7
49999 4 4 5
DeterioratingInfrastructure PopulationScore WetlandLoss \
0 3 4 3
1 1 1 9
2 6 1 8
3 8 8 6
4 3 4 4
... ... ... ...
49995 6 1 5
49996 4 4 2
49997 5 1 6
49998 6 2 4
49999 6 7 8
InadequatePlanning PoliticalFactors FloodProbability
0 2 6 0.450
1 1 3 0.475
2 3 6 0.515
3 6 10 0.520
4 3 4 0.475
... ... ... ...
49995 4 2 0.535
49996 4 5 0.510
49997 4 3 0.430
49998 0 11 0.515
49999 10 7 0.580
[50000 rows x 21 columns]
| MonsoonIntensity | TopographyDrainage | RiverManagement | Deforestation | Urbanization | ClimateChange | DamsQuality | Siltation | AgriculturalPractices | Encroachments | IneffectiveDisasterPreparedness | DrainageSystems | CoastalVulnerability | Landslides | Watersheds | DeterioratingInfrastructure | PopulationScore | WetlandLoss | InadequatePlanning | PoliticalFactors | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 48893 | 6 | 4 | 3 | 6 | 3 | 7 | 6 | 10 | 8 | 4 | 4 | 3 | 3 | 5 | 1 | 6 | 7 | 3 | 0 | 2 |
| 1210 | 7 | 6 | 5 | 3 | 8 | 5 | 4 | 7 | 4 | 1 | 11 | 3 | 7 | 5 | 7 | 2 | 4 | 3 | 5 | 10 |
| 28868 | 2 | 5 | 7 | 5 | 4 | 5 | 5 | 6 | 6 | 8 | 4 | 4 | 6 | 3 | 6 | 2 | 8 | 6 | 5 | 4 |
| 3618 | 6 | 5 | 1 | 6 | 3 | 3 | 9 | 6 | 6 | 4 | 12 | 4 | 6 | 4 | 6 | 5 | 5 | 3 | 4 | 8 |
| 24579 | 7 | 3 | 4 | 3 | 3 | 4 | 5 | 5 | 3 | 4 | 8 | 4 | 2 | 7 | 4 | 7 | 6 | 1 | 3 | 5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10160 | 9 | 8 | 9 | 2 | 6 | 4 | 9 | 6 | 4 | 6 | 3 | 6 | 3 | 2 | 4 | 9 | 6 | 6 | 8 | 5 |
| 8246 | 4 | 4 | 1 | 5 | 5 | 5 | 4 | 7 | 2 | 6 | 6 | 4 | 6 | 5 | 6 | 2 | 3 | 11 | 10 | 3 |
| 26176 | 6 | 3 | 8 | 7 | 5 | 1 | 9 | 7 | 10 | 2 | 8 | 3 | 4 | 4 | 5 | 4 | 1 | 2 | 4 | 5 |
| 4447 | 4 | 4 | 3 | 7 | 5 | 6 | 7 | 6 | 2 | 2 | 8 | 5 | 8 | 8 | 7 | 6 | 2 | 3 | 3 | 5 |
| 3957 | 1 | 4 | 5 | 2 | 6 | 3 | 10 | 6 | 2 | 2 | 5 | 4 | 4 | 6 | 5 | 5 | 3 | 2 | 5 | 2 |
40000 rows × 20 columns
selected_attributes = df.select_dtypes(include=['int64', 'float64']).columns.drop('id', errors='ignore')
grph.figure(figsize=(12, 8))
sns.boxplot(data=df[selected_attributes], orient="h")
grph.title('EDA : Boxplot of Selected Attribuites')
grph.xlabel('Magnitude of outcomes')
grph.show()
df.hist(figsize=(20,20),bins=70)
plt.show()
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x21c360e2550>
df['FloodProbability'].hist()
<Axes: >
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report
# Assume df is your DataFrame and 'ClimateChange' is your continuous target variable
X = df.drop(columns=['FloodProbability']) # Exclude the text column from numerical features
y = df['FloodProbability']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; you can adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data for numerical features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Logistic Regression
model = LogisticRegression()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Logistic Regression Validation Accuracy:", np.mean(scores) * 100)
# Decision Tree Classifier
model = DecisionTreeClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Decision Tree Validation Accuracy:", np.mean(scores) * 100)
# Naive Bayes
model = GaussianNB()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Naive Bayes Validation Accuracy:", np.mean(scores) * 100)
# K-Nearest Neighbors
model = KNeighborsClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("K-Nearest Neighbors Validation Accuracy:", np.mean(scores) * 100)
# Random Forest Classifier
model = RandomForestClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Random Forest Validation Accuracy:", np.mean(scores) * 100)
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
from sklearn.metrics import mean_squared_error, r2_score
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Calculate R-squared Score
r2 = r2_score(y_test, y_pred)
print("Linear Regression R-squared Score:", r2)
Logistic Regression Validation Accuracy: 100.0 Decision Tree Validation Accuracy: 69.12 Naive Bayes Validation Accuracy: 91.4625 K-Nearest Neighbors Validation Accuracy: 83.93500000000002 Random Forest Validation Accuracy: 89.42 Linear Regression Mean Squared Error: 0.09023106289377088 Linear Regression R-squared Score: 0.6380971631540853
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
# Assume df is your DataFrame and 'FloodProbability' is your continuous target variable
X = df.drop(columns=['FloodProbability']) # Exclude the target variable from features
y = df['FloodProbability']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Logistic Regression Test Accuracy:", accuracy * 100)
# Decision Tree Classifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Decision Tree Test Accuracy:", accuracy * 100)
# Naive Bayes
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Naive Bayes Test Accuracy:", accuracy * 100)
# K-Nearest Neighbors
model = KNeighborsClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("K-Nearest Neighbors Test Accuracy:", accuracy * 100)
# Random Forest Classifier
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Random Forest Test Accuracy:", accuracy * 100)
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
print("Linear Regression R-squared Score:", r2)
Logistic Regression Test Accuracy: 100.0 Decision Tree Test Accuracy: 69.19 Naive Bayes Test Accuracy: 91.18 K-Nearest Neighbors Test Accuracy: 84.11999999999999 Random Forest Test Accuracy: 89.29 Linear Regression Mean Squared Error: 0.09023106289377088 Linear Regression R-squared Score: 0.6380971631540853
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
# Assume df is your DataFrame and 'FloodProbability' is your continuous target variable
# df = pd.read_csv('your_dataframe.csv') # Uncomment and modify this line to load your DataFrame
X = df.drop(columns=['FloodProbability']) # Exclude the target variable from features
y = df['FloodProbability']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
def plot_confusion_matrix(y_test, y_pred, title):
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Flood', 'Flood'], yticklabels=['No Flood', 'Flood'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title(title)
plt.show()
# Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Logistic Regression Test Accuracy:", accuracy * 100)
plot_confusion_matrix(y_test, y_pred, "Logistic Regression")
# Decision Tree Classifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Decision Tree Test Accuracy:", accuracy * 100)
plot_confusion_matrix(y_test, y_pred, "Decision Tree")
# Naive Bayes
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Naive Bayes Test Accuracy:", accuracy * 100)
plot_confusion_matrix(y_test, y_pred, "Naive Bayes")
# K-Nearest Neighbors
model = KNeighborsClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("K-Nearest Neighbors Test Accuracy:", accuracy * 100)
plot_confusion_matrix(y_test, y_pred, "K-Nearest Neighbors")
# Random Forest Classifier
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Random Forest Test Accuracy:", accuracy * 100)
plot_confusion_matrix(y_test, y_pred, "Random Forest")
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
print("Linear Regression R-squared Score:", r2)
Logistic Regression Test Accuracy: 100.0
Decision Tree Test Accuracy: 69.42
Naive Bayes Test Accuracy: 91.18
K-Nearest Neighbors Test Accuracy: 84.11999999999999
Random Forest Test Accuracy: 89.05999999999999
Linear Regression Mean Squared Error: 0.09023106289377088 Linear Regression R-squared Score: 0.6380971631540853
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, r2_score
# Assume df is your DataFrame and 'FloodProbability' is your continuous target variable
# df = ... # your DataFrame
X = df.drop(columns=['FloodProbability']) # Exclude the target variable from features
y = df['FloodProbability']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define a function to evaluate the classification model
def evaluate_classification_model(model, X_train, y_train, X_test, y_test):
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"{model.__class__.__name__} Test Accuracy: {accuracy * 100:.2f}%")
print(f"{model.__class__.__name__} Precision: {precision:.2f}")
print(f"{model.__class__.__name__} Recall: {recall:.2f}")
print(f"{model.__class__.__name__} F1-Score: {f1:.2f}")
# Logistic Regression
evaluate_classification_model(LogisticRegression(), X_train, y_train, X_test, y_test)
# Decision Tree Classifier
evaluate_classification_model(DecisionTreeClassifier(), X_train, y_train, X_test, y_test)
# Naive Bayes
evaluate_classification_model(GaussianNB(), X_train, y_train, X_test, y_test)
# K-Nearest Neighbors
evaluate_classification_model(KNeighborsClassifier(), X_train, y_train, X_test, y_test)
# Random Forest Classifier
evaluate_classification_model(RandomForestClassifier(), X_train, y_train, X_test, y_test)
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
print("Linear Regression R-squared Score:", r2)
LogisticRegression Test Accuracy: 100.00% LogisticRegression Precision: 1.00 LogisticRegression Recall: 1.00 LogisticRegression F1-Score: 1.00 DecisionTreeClassifier Test Accuracy: 69.49% DecisionTreeClassifier Precision: 0.68 DecisionTreeClassifier Recall: 0.67 DecisionTreeClassifier F1-Score: 0.67 GaussianNB Test Accuracy: 91.18% GaussianNB Precision: 0.92 GaussianNB Recall: 0.89 GaussianNB F1-Score: 0.91 KNeighborsClassifier Test Accuracy: 84.12% KNeighborsClassifier Precision: 0.88 KNeighborsClassifier Recall: 0.77 KNeighborsClassifier F1-Score: 0.82 RandomForestClassifier Test Accuracy: 89.41% RandomForestClassifier Precision: 0.93 RandomForestClassifier Recall: 0.84 RandomForestClassifier F1-Score: 0.88 Linear Regression Mean Squared Error: 0.09023106289377088 Linear Regression R-squared Score: 0.6380971631540853
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report
# Assume df is your DataFrame and 'ClimateChange' is your continuous target variable
X = df.drop(columns=['ClimateChange']) # Exclude the text column from numerical features
y = df['ClimateChange']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; you can adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data for numerical features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Logistic Regression
model = LogisticRegression()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Logistic Regression Validation Accuracy:", np.mean(scores) * 100)
# Decision Tree Classifier
model = DecisionTreeClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Decision Tree Validation Accuracy:", np.mean(scores) * 100)
# Naive Bayes
model = GaussianNB()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Naive Bayes Validation Accuracy:", np.mean(scores) * 100)
# K-Nearest Neighbors
model = KNeighborsClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("K-Nearest Neighbors Validation Accuracy:", np.mean(scores) * 100)
# Random Forest Classifier
model = RandomForestClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Random Forest Validation Accuracy:", np.mean(scores) * 100)
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
from sklearn.metrics import mean_squared_error, r2_score
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Calculate R-squared Score
r2 = r2_score(y_test, y_pred)
print("Linear Regression R-squared Score:", r2)
Logistic Regression Validation Accuracy: 99.3425 Decision Tree Validation Accuracy: 98.46999999999998 Naive Bayes Validation Accuracy: 99.3425
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py:136: UserWarning: Could not find the number of physical cores for the following reason:
[WinError 2] The system cannot find the file specified
Returning the number of logical cores instead. You can silence this warning by setting LOKY_MAX_CPU_COUNT to the number of cores you want to use.
warnings.warn(
File "C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
cpu_info = subprocess.run(
^^^^^^^^^^^^^^^
File "C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 548, in run
with Popen(*popenargs, **kwargs) as process:
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1026, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1538, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
K-Nearest Neighbors Validation Accuracy: 99.3425 Random Forest Validation Accuracy: 99.3425 Linear Regression Mean Squared Error: 0.006063152985706354 Linear Regression R-squared Score: 0.03149317115185157
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report, r2_score
# Assume df is your DataFrame and 'ClimateChange' is your continuous target variable
# df = ... # your DataFrame
X = df.drop(columns=['ClimateChange']) # Exclude the target variable from features
y = df['ClimateChange']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; you can adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data for numerical features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Function to print classification report
def print_classification_report(model, X_train, y_train, X_test, y_test):
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"Classification Report for {model.__class__.__name__}:\n")
print(classification_report(y_test, y_pred))
# Logistic Regression
model = LogisticRegression()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Logistic Regression Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Decision Tree Classifier
model = DecisionTreeClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Decision Tree Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Naive Bayes
model = GaussianNB()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Naive Bayes Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# K-Nearest Neighbors
model = KNeighborsClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("K-Nearest Neighbors Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Random Forest Classifier
model = RandomForestClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Random Forest Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
print("Linear Regression R-squared Score:", r2)
Logistic Regression Validation Accuracy: 99.3425
Classification Report for LogisticRegression:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Decision Tree Validation Accuracy: 98.42
Classification Report for DecisionTreeClassifier:
precision recall f1-score support
0 0.01 0.02 0.01 63
1 0.99 0.99 0.99 9937
accuracy 0.98 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.98 0.99 10000
Naive Bayes Validation Accuracy: 99.3425
Classification Report for GaussianNB:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
K-Nearest Neighbors Validation Accuracy: 99.3425
Classification Report for KNeighborsClassifier:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Random Forest Validation Accuracy: 99.3425
Classification Report for RandomForestClassifier:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
Linear Regression Mean Squared Error: 0.006063152985706354
Linear Regression R-squared Score: 0.03149317115185157
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report, confusion_matrix, r2_score
# Assume df is your DataFrame and 'ClimateChange' is your continuous target variable
# df = pd.read_csv('your_dataset.csv') # Uncomment this line and load your dataset
X = df.drop(columns=['ClimateChange']) # Exclude the text column from numerical features
y = df['ClimateChange']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; you can adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data for numerical features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# List of models to evaluate
models = {
'Logistic Regression': LogisticRegression(),
'Decision Tree': DecisionTreeClassifier(),
'Naive Bayes': GaussianNB(),
'K-Nearest Neighbors': KNeighborsClassifier(),
'Random Forest': RandomForestClassifier()
}
# Function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, title):
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title(f'Confusion Matrix for {title}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
# Evaluate models
for name, model in models.items():
# Perform cross-validation
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print(f"{name} Validation Accuracy: {np.mean(scores) * 100:.2f}%")
# Train and test the model
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Plot confusion matrix
plot_confusion_matrix(y_test, y_pred, name)
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
print("Linear Regression R-squared Score:", r2)
# Plot confusion matrix for Linear Regression (binarized prediction for consistency)
y_pred_binary = (y_pred > threshold).astype(int)
plot_confusion_matrix(y_test, y_pred_binary, "Linear Regression (Binarized)")
Logistic Regression Validation Accuracy: 99.34%
Decision Tree Validation Accuracy: 98.48%
Naive Bayes Validation Accuracy: 99.34%
K-Nearest Neighbors Validation Accuracy: 99.34%
Random Forest Validation Accuracy: 99.34%
Linear Regression Mean Squared Error: 0.006063152985706354 Linear Regression R-squared Score: 0.03149317115185157
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
# Sample dataset creation
data = {
'MonsoonIntensity': np.random.rand(100),
'TopographyDrainage': np.random.rand(100),
'RiverManagement': np.random.rand(100),
'Deforestation': np.random.rand(100),
'Urbanization': np.random.rand(100),
'ClimateChange': np.random.rand(100),
'DamsQuality': np.random.rand(100),
'Siltation': np.random.rand(100),
'AgriculturalPractices': np.random.rand(100),
'Encroachments': np.random.rand(100),
'IneffectiveDisasterPreparedness': np.random.rand(100),
'DrainageSystems': np.random.rand(100),
'CoastalVulnerability': np.random.rand(100),
'Landslides': np.random.rand(100),
'Watersheds': np.random.rand(100),
'DeterioratingInfrastructure': np.random.rand(100),
'PopulationScore': np.random.rand(100),
'WetlandLoss': np.random.rand(100),
'InadequatePlanning': np.random.rand(100),
'PoliticalFactors': np.random.rand(100),
'FloodProbability': np.random.rand(100)
}
df = pd.DataFrame(data)
# Splitting data into features and target variable
X = df.drop('FloodProbability', axis=1)
y = df['FloodProbability']
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Creating and training the model
model = RandomForestRegressor(random_state=42)
# Setting up the parameter grid
param_grid = {
'n_estimators': [50, 100, 200],
'max_depth': [None, 10, 20, 30],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
# Performing grid search
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)
# Getting the best model from grid search
best_model = grid_search.best_estimator_
# Making predictions with the best model
y_best_pred = best_model.predict(X_test)
# Evaluating the best model
best_mse = mean_squared_error(y_test, y_best_pred)
print(f"Best Mean Squared Error: {best_mse}")
# Feature importance
importances = best_model.feature_importances_
feature_importance = pd.DataFrame({'Feature': X.columns, 'Importance': importances})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
print(feature_importance)
# Plotting feature importance
plt.figure(figsize=(12, 8))
sns.barplot(x='Importance', y='Feature', data=feature_importance)
plt.title('Feature Importance')
plt.show()
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Mean Squared Error: 0.09007535191248463
Feature Importance
7 Siltation 0.100154
15 DeterioratingInfrastructure 0.079997
4 Urbanization 0.074658
16 PopulationScore 0.066043
14 Watersheds 0.060470
10 IneffectiveDisasterPreparedness 0.059723
17 WetlandLoss 0.058898
18 InadequatePlanning 0.056367
1 TopographyDrainage 0.051095
9 Encroachments 0.045737
5 ClimateChange 0.043783
13 Landslides 0.042121
6 DamsQuality 0.042053
2 RiverManagement 0.041426
19 PoliticalFactors 0.036528
11 DrainageSystems 0.033825
0 MonsoonIntensity 0.030576
3 Deforestation 0.030089
12 CoastalVulnerability 0.026940
8 AgriculturalPractices 0.019517
import matplotlib.pyplot as plt
# Ensure df is your DataFrame and contains the columns 'MonsoonIntensity', 'TopographyDrainage', 'FloodProbability'
colors = ['red', 'orange', 'blue']
species = df['FloodProbability'].unique() # Get unique flood probability values for legend and color mapping
# Create a scatter plot for each species
for sp in species:
# Filter data based on the current species
x = df[df['FloodProbability'] == sp]
# Plot the scatter plot with flood probability as color
plt.scatter(x['MonsoonIntensity'], x['TopographyDrainage'], c=x['FloodProbability'], cmap='viridis', label=sp)
plt.xlabel("Monsoon Intensity")
plt.ylabel("Topography Drainage")
plt.colorbar(label='Flood Probability') # Add a color bar to indicate flood probability
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12,8))
sns.barplot(x='MonsoonIntensity', hue='TopographyDrainage', data=df, ci=None)
plt.title('Bar Plot of MonsoonIntensity by TopographyDrainage')
plt.xlabel('Monsoon Intensity')
plt.ylabel('Count')
plt.legend(title='Topography Drainage')
plt.show()
C:\Users\ibmuser\AppData\Local\Temp\ipykernel_3156\2048272625.py:5: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. sns.barplot(x='MonsoonIntensity', hue='TopographyDrainage', data=df, ci=None)
pip install pydot
Requirement already satisfied: pydot in c:\users\ibmuser\appdata\local\programs\python\python311\lib\site-packages (3.0.1) Requirement already satisfied: pyparsing>=3.0.9 in c:\users\ibmuser\appdata\local\programs\python\python311\lib\site-packages (from pydot) (3.1.1) Note: you may need to restart the kernel to use updated packages.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model
# Assume df is your DataFrame and 'ClimateChange' is your continuous target variable
# df = ... # your DataFrame
X = df.drop(columns=['ClimateChange']) # Exclude the target variable from features
y = df['ClimateChange']
# Binarize the target variable using a threshold
threshold = 0.5 # Example threshold; you can adjust this based on your specific needs
y = (y > threshold).astype(int)
# Split the data for numerical features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Function to print classification report
def print_classification_report(model, X_train, y_train, X_test, y_test):
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"Classification Report for {model.__class__.__name__}:\n")
print(classification_report(y_test, y_pred))
# Logistic Regression
model = LogisticRegression()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Logistic Regression Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Decision Tree Classifier
model = DecisionTreeClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Decision Tree Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Naive Bayes
model = GaussianNB()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Naive Bayes Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# K-Nearest Neighbors
model = KNeighborsClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("K-Nearest Neighbors Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Random Forest Classifier
model = RandomForestClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("Random Forest Validation Accuracy:", np.mean(scores) * 100)
print_classification_report(model, X_train, y_train, X_test, y_test)
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Linear Regression Mean Squared Error:", mse)
print("Linear Regression R-squared Score:", r2)
# Neural Network with Keras
nn_model = Sequential()
nn_model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
nn_model.add(Dense(16, activation='relu'))
nn_model.add(Dense(1, activation='sigmoid'))
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train the model
history = nn_model.fit(X_train, y_train, epochs=50, batch_size=10, validation_data=(X_test, y_test))
# Evaluate the model
loss, accuracy = nn_model.evaluate(X_test, y_test)
print(f"Neural Network Accuracy: {accuracy * 100}")
# Plot model architecture
plot_model(nn_model, to_file='model.png', show_shapes=True, show_layer_names=True)
# Plot training & validation accuracy values
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
Logistic Regression Validation Accuracy: 99.3425
Classification Report for LogisticRegression:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Decision Tree Validation Accuracy: 98.435
Classification Report for DecisionTreeClassifier:
precision recall f1-score support
0 0.01 0.02 0.01 63
1 0.99 0.99 0.99 9937
accuracy 0.98 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.98 0.99 10000
Naive Bayes Validation Accuracy: 99.3425
Classification Report for GaussianNB:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
K-Nearest Neighbors Validation Accuracy: 99.3425
Classification Report for KNeighborsClassifier:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Random Forest Validation Accuracy: 99.3425
Classification Report for RandomForestClassifier:
precision recall f1-score support
0 0.00 0.00 0.00 63
1 0.99 1.00 1.00 9937
accuracy 0.99 10000
macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 0.99 0.99 10000
Linear Regression Mean Squared Error: 0.006063152985706354
Linear Regression R-squared Score: 0.03149317115185157
Epoch 1/50
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\ibmuser\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\layers\core\dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
super().__init__(activity_regularizer=activity_regularizer, **kwargs)
4000/4000 ━━━━━━━━━━━━━━━━━━━━ 5s 998us/step - accuracy: 0.9898 - loss: 0.0514 - val_accuracy: 0.9937 - val_loss: 0.0385 Epoch 2/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 902us/step - accuracy: 0.9935 - loss: 0.0414 - val_accuracy: 0.9937 - val_loss: 0.0406 Epoch 3/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 895us/step - accuracy: 0.9937 - loss: 0.0403 - val_accuracy: 0.9937 - val_loss: 0.0400 Epoch 4/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 882us/step - accuracy: 0.9937 - loss: 0.0399 - val_accuracy: 0.9937 - val_loss: 0.0389 Epoch 5/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 901us/step - accuracy: 0.9925 - loss: 0.0457 - val_accuracy: 0.9937 - val_loss: 0.0392 Epoch 6/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 948us/step - accuracy: 0.9936 - loss: 0.0409 - val_accuracy: 0.9937 - val_loss: 0.0433 Epoch 7/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 878us/step - accuracy: 0.9933 - loss: 0.0423 - val_accuracy: 0.9937 - val_loss: 0.0395 Epoch 8/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 898us/step - accuracy: 0.9939 - loss: 0.0385 - val_accuracy: 0.9937 - val_loss: 0.0385 Epoch 9/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 937us/step - accuracy: 0.9927 - loss: 0.0438 - val_accuracy: 0.9937 - val_loss: 0.0384 Epoch 10/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 919us/step - accuracy: 0.9933 - loss: 0.0411 - val_accuracy: 0.9937 - val_loss: 0.0409 Epoch 11/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 883us/step - accuracy: 0.9933 - loss: 0.0408 - val_accuracy: 0.9937 - val_loss: 0.0420 Epoch 12/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 951us/step - accuracy: 0.9935 - loss: 0.0407 - val_accuracy: 0.9937 - val_loss: 0.0424 Epoch 13/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 956us/step - accuracy: 0.9939 - loss: 0.0376 - val_accuracy: 0.9937 - val_loss: 0.0384 Epoch 14/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 942us/step - accuracy: 0.9930 - loss: 0.0428 - val_accuracy: 0.9937 - val_loss: 0.0383 Epoch 15/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 1ms/step - accuracy: 0.9936 - loss: 0.0394 - val_accuracy: 0.9937 - val_loss: 0.0383 Epoch 16/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 5s 1ms/step - accuracy: 0.9941 - loss: 0.0368 - val_accuracy: 0.9937 - val_loss: 0.0388 Epoch 17/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 1ms/step - accuracy: 0.9931 - loss: 0.0411 - val_accuracy: 0.9937 - val_loss: 0.0397 Epoch 18/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 1ms/step - accuracy: 0.9931 - loss: 0.0412 - val_accuracy: 0.9937 - val_loss: 0.0392 Epoch 19/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 971us/step - accuracy: 0.9930 - loss: 0.0421 - val_accuracy: 0.9937 - val_loss: 0.0389 Epoch 20/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 904us/step - accuracy: 0.9930 - loss: 0.0421 - val_accuracy: 0.9937 - val_loss: 0.0400 Epoch 21/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 904us/step - accuracy: 0.9936 - loss: 0.0391 - val_accuracy: 0.9937 - val_loss: 0.0409 Epoch 22/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 902us/step - accuracy: 0.9935 - loss: 0.0391 - val_accuracy: 0.9937 - val_loss: 0.0390 Epoch 23/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 3s 862us/step - accuracy: 0.9934 - loss: 0.0396 - val_accuracy: 0.9937 - val_loss: 0.0389 Epoch 24/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 962us/step - accuracy: 0.9936 - loss: 0.0382 - val_accuracy: 0.9937 - val_loss: 0.0407 Epoch 25/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 939us/step - accuracy: 0.9935 - loss: 0.0388 - val_accuracy: 0.9937 - val_loss: 0.0404 Epoch 26/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 901us/step - accuracy: 0.9940 - loss: 0.0363 - val_accuracy: 0.9937 - val_loss: 0.0435 Epoch 27/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 984us/step - accuracy: 0.9936 - loss: 0.0387 - val_accuracy: 0.9937 - val_loss: 0.0390 Epoch 28/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 999us/step - accuracy: 0.9926 - loss: 0.0426 - val_accuracy: 0.9937 - val_loss: 0.0387 Epoch 29/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 958us/step - accuracy: 0.9940 - loss: 0.0363 - val_accuracy: 0.9937 - val_loss: 0.0394 Epoch 30/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 972us/step - accuracy: 0.9933 - loss: 0.0393 - val_accuracy: 0.9937 - val_loss: 0.0397 Epoch 31/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 958us/step - accuracy: 0.9937 - loss: 0.0379 - val_accuracy: 0.9937 - val_loss: 0.0407 Epoch 32/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 981us/step - accuracy: 0.9930 - loss: 0.0408 - val_accuracy: 0.9937 - val_loss: 0.0412 Epoch 33/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 984us/step - accuracy: 0.9934 - loss: 0.0389 - val_accuracy: 0.9937 - val_loss: 0.0391 Epoch 34/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 984us/step - accuracy: 0.9932 - loss: 0.0401 - val_accuracy: 0.9937 - val_loss: 0.0412 Epoch 35/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 939us/step - accuracy: 0.9941 - loss: 0.0357 - val_accuracy: 0.9937 - val_loss: 0.0399 Epoch 36/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 954us/step - accuracy: 0.9932 - loss: 0.0394 - val_accuracy: 0.9937 - val_loss: 0.0411 Epoch 37/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 990us/step - accuracy: 0.9932 - loss: 0.0394 - val_accuracy: 0.9937 - val_loss: 0.0391 Epoch 38/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 947us/step - accuracy: 0.9935 - loss: 0.0382 - val_accuracy: 0.9937 - val_loss: 0.0396 Epoch 39/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 913us/step - accuracy: 0.9934 - loss: 0.0382 - val_accuracy: 0.9937 - val_loss: 0.0405 Epoch 40/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 984us/step - accuracy: 0.9936 - loss: 0.0373 - val_accuracy: 0.9937 - val_loss: 0.0404 Epoch 41/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 921us/step - accuracy: 0.9939 - loss: 0.0358 - val_accuracy: 0.9937 - val_loss: 0.0393 Epoch 42/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 975us/step - accuracy: 0.9927 - loss: 0.0415 - val_accuracy: 0.9937 - val_loss: 0.0411 Epoch 43/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 982us/step - accuracy: 0.9932 - loss: 0.0397 - val_accuracy: 0.9937 - val_loss: 0.0437 Epoch 44/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 915us/step - accuracy: 0.9929 - loss: 0.0406 - val_accuracy: 0.9937 - val_loss: 0.0443 Epoch 45/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 982us/step - accuracy: 0.9934 - loss: 0.0387 - val_accuracy: 0.9937 - val_loss: 0.0415 Epoch 46/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 1ms/step - accuracy: 0.9936 - loss: 0.0375 - val_accuracy: 0.9937 - val_loss: 0.0407 Epoch 47/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 977us/step - accuracy: 0.9932 - loss: 0.0390 - val_accuracy: 0.9937 - val_loss: 0.0404 Epoch 48/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 933us/step - accuracy: 0.9928 - loss: 0.0409 - val_accuracy: 0.9937 - val_loss: 0.0461 Epoch 49/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 911us/step - accuracy: 0.9933 - loss: 0.0376 - val_accuracy: 0.9937 - val_loss: 0.0396 Epoch 50/50 4000/4000 ━━━━━━━━━━━━━━━━━━━━ 4s 901us/step - accuracy: 0.9939 - loss: 0.0359 - val_accuracy: 0.9937 - val_loss: 0.0424 313/313 ━━━━━━━━━━━━━━━━━━━━ 0s 685us/step - accuracy: 0.9941 - loss: 0.0391 Neural Network Accuracy: 99.37000274658203
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pydot\core.py:1799, in Dot.create(self, prog, format, encoding) 1798 try: -> 1799 stdout_data, stderr_data, process = call_graphviz( 1800 program=prog, 1801 arguments=arguments, 1802 working_dir=tmp_dir, 1803 ) 1804 except OSError as e: File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pydot\core.py:222, in call_graphviz(program, arguments, working_dir, **kwargs) 220 program_with_args = [program] + arguments --> 222 process = subprocess.Popen( 223 program_with_args, 224 env=env, 225 cwd=working_dir, 226 shell=False, 227 stderr=subprocess.PIPE, 228 stdout=subprocess.PIPE, 229 **kwargs, 230 ) 231 stdout_data, stderr_data = process.communicate() File ~\AppData\Local\Programs\Python\Python311\Lib\subprocess.py:1026, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group) 1023 self.stderr = io.TextIOWrapper(self.stderr, 1024 encoding=encoding, errors=errors) -> 1026 self._execute_child(args, executable, preexec_fn, close_fds, 1027 pass_fds, cwd, env, 1028 startupinfo, creationflags, shell, 1029 p2cread, p2cwrite, 1030 c2pread, c2pwrite, 1031 errread, errwrite, 1032 restore_signals, 1033 gid, gids, uid, umask, 1034 start_new_session, process_group) 1035 except: 1036 # Cleanup if the child failed starting. File ~\AppData\Local\Programs\Python\Python311\Lib\subprocess.py:1538, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group) 1537 try: -> 1538 hp, ht, pid, tid = _winapi.CreateProcess(executable, args, 1539 # no special security 1540 None, None, 1541 int(not close_fds), 1542 creationflags, 1543 env, 1544 cwd, 1545 startupinfo) 1546 finally: 1547 # Child is launched. Close the parent's copy of those pipe 1548 # handles that only the child should have open. You need (...) 1551 # pipe will not close when the child process exits and the 1552 # ReadFile will hang. FileNotFoundError: [WinError 2] The system cannot find the file specified During handling of the above exception, another exception occurred: FileNotFoundError Traceback (most recent call last) File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\model_visualization.py:37, in check_graphviz() 34 try: 35 # Attempt to create an image of a blank graph 36 # to check the pydot/graphviz installation. ---> 37 pydot.Dot.create(pydot.Dot()) 38 return True File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pydot\core.py:1808, in Dot.create(self, prog, format, encoding) 1807 args[1] = f'"{prog}" not found in path.' -> 1808 raise OSError(*args) 1809 else: FileNotFoundError: [WinError 2] "dot" not found in path. During handling of the above exception, another exception occurred: AttributeError Traceback (most recent call last) Cell In[30], line 91 88 print(f"Neural Network Accuracy: {accuracy * 100}") 90 # Plot model architecture ---> 91 plot_model(nn_model, to_file='model.png', show_shapes=True, show_layer_names=True) 93 # Plot training & validation accuracy values 94 import matplotlib.pyplot as plt File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\model_visualization.py:430, in plot_model(model, to_file, show_shapes, show_dtype, show_layer_names, rankdir, expand_nested, dpi, show_layer_activations, show_trainable, **kwargs) 428 else: 429 raise ImportError(message) --> 430 if not check_graphviz(): 431 message = ( 432 "You must install graphviz " 433 "(see instructions at https://graphviz.gitlab.io/download/) " 434 "for `plot_model` to work." 435 ) 436 if "IPython.core.magics.namespace" in sys.modules: 437 # We don't raise an exception here in order to avoid crashing 438 # notebook tests where graphviz is not available. File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\model_visualization.py:39, in check_graphviz() 37 pydot.Dot.create(pydot.Dot()) 38 return True ---> 39 except (OSError, pydot.InvocationException): 40 return False AttributeError: module 'pydot' has no attribute 'InvocationException'